library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggbeeswarm)

twm <- read_csv("../data/taiwanese_mandarin_durations.csv") %>%
  arrange(speaker, file, phone_start) %>%
  mutate(recording_type = str_extract(file, "[rs]s$")) %>%
  filter(speaker != "mn_tw_66")
## Rows: 124703 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): speaker, sex, file, uttr_id, word, phone, previous, following, seg...
## dbl (16): age, speech_rate, speech_rate_phone, num_words_uttr, num_syllables...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

How much do speech rates vary?

twm %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, col=recording_type, y=speech_rate)) +
  facet_wrap(~ speaker) +
  geom_beeswarm() +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

twm %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, fill=recording_type, y=speech_rate)) +
  geom_violin() +
  stat_summary(fun=mean, geom="point", shape=16) +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

Some data filtering.

# utterance level filtering
# - at least 5 syllables
# - log sd based filtering for top end of speech rate

twm_uttr <- twm %>% 
  group_by(uttr_id, speaker) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
  group_by(speaker) %>%
  filter(log(speech_rate) < mean(log(speech_rate)) + 3*sd(log(speech_rate))) %>%
  ungroup()
## `summarise()` has grouped output by 'uttr_id'. You can override using the `.groups` argument.
twm <- twm %>%
  filter(
    num_syllables_uttr >= 5,
    uttr_id %in% twm_uttr$uttr_id
  )

Plot speech rates again.

twm %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, col=recording_type, y=speech_rate)) +
  facet_wrap(~ speaker) +
  geom_beeswarm() +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

twm %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, fill=recording_type, y=speech_rate)) +
  geom_violin() +
  stat_summary(fun=mean, geom="point", shape=16) +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

Right, calculate %V!

twm_rhythm <- twm %>%
  group_by(
    speaker, 
    sex,
    age,
    recording_type,
    uttr_id
  ) %>%
  summarise(
    speech_rate=num_syllables_uttr[1]/sum(phone_dur[segment_type %in% c("vowel", "consonant")]),
    speech_rate_phone=length(phone_dur[segment_type %in% c("vowel", "consonant")])/sum(phone_dur[segment_type %in% c("vowel", "consonant")]),
    num_words_uttr=num_words_uttr[1],
    num_syllables_uttr=num_syllables_uttr[1],
    uttr_start=uttr_start[1],
    uttr_end=uttr_end[1],
    uttr_dur=uttr_dur[1],
    v_prop=sum(phone_dur[segment_type=="vowel"])/sum(phone_dur[segment_type %in% c("vowel", "consonant")]),
    v_dur=mean(phone_dur[segment_type=="vowel"]),
    c_dur=mean(phone_dur[segment_type=="consonant"])
  ) %>%
  ungroup() %>%
  filter((1/speech_rate_phone) < 0.2)
## `summarise()` has grouped output by 'speaker', 'sex', 'age', 'recording_type'. You can override using the `.groups` argument.

Exploratory plots!

ggplot(twm_rhythm, aes(x=1/speech_rate_phone, y=v_prop*100)) +
  geom_point() +
  geom_smooth(method="gam") +
  ylab("V%") +
  xlab("average segment duration (1 / speech rate)")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/twm_avg_seg_dur-v_perc.png", width=6, height=4.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
ggplot(twm_rhythm, aes(x=1/speech_rate_phone, y=v_prop*100)) +
  facet_wrap(~speaker) +
  geom_point() +
  geom_smooth(method="gam") +
  ylab("V%") +
  xlab("average segment duration (1 / speech rate)")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/twm_avg_seg_dur-v_perc-speaker.png", width=10, height=7.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
ggplot(twm_rhythm, aes(x=1/speech_rate_phone)) +
  geom_point(aes(y=v_dur), col="orange", alpha=0.3) +
  geom_point(aes(y=c_dur), col="purple", alpha=0.3) +
  geom_smooth(aes(y=v_dur), col="darkorange2", method="gam", se=F) +
  geom_smooth(aes(y=c_dur), col="purple4", method="gam", se=F) +
  ylab("average duration in s")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/twm_avg_seg_dur-avg_C+V_dur.png", width=6, height=4.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
ggplot(twm_rhythm, aes(x=1/speech_rate_phone)) +
  facet_wrap(~speaker) +
  geom_point(aes(y=v_dur), col="orange", alpha=0.3) +
  geom_point(aes(y=c_dur), col="purple", alpha=0.3) +
  geom_smooth(aes(y=v_dur), col="darkorange2", method="gam", se=F) +
  geom_smooth(aes(y=c_dur), col="purple4", method="gam", se=F) +
  ylab("average duration in s")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/twm_avg_seg_dur-avg_C+V_dur-speaker.png", width=10, height=7.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

Korean

library(tidyverse)
library(ggbeeswarm)

k <- read_csv("../data/korean_durations.csv") %>%
  arrange(speaker, file, phone_start) %>%
  mutate(recording_type = str_extract(file, "[rs]s$"))
## Rows: 154202 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): speaker, sex, file, uttr_id, word, phone, previous, following, seg...
## dbl (16): age, speech_rate, speech_rate_phone, num_words_uttr, num_syllables...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

How much do speech rates vary?

k %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, col=recording_type, y=speech_rate)) +
  facet_wrap(~ speaker) +
  geom_beeswarm() +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

k %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, fill=recording_type, y=speech_rate)) +
  geom_violin() +
  stat_summary(fun=mean, geom="point", shape=16) +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

Some data filtering.

# utterance level filtering
# - at least 5 syllables
# - log sd based filtering for top end of speech rate

k_uttr <- k %>% 
  group_by(uttr_id, speaker) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
  group_by(speaker) %>%
  filter(log(speech_rate) < mean(log(speech_rate)) + 3*sd(log(speech_rate))) %>%
  ungroup()
## `summarise()` has grouped output by 'uttr_id'. You can override using the `.groups` argument.
k <- k %>%
  filter(
    num_syllables_uttr >= 5,
    uttr_id %in% k_uttr$uttr_id
  )

Plot speech rates again.

k %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, col=recording_type, y=speech_rate)) +
  facet_wrap(~ speaker) +
  geom_beeswarm() +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

k %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, fill=recording_type, y=speech_rate)) +
  geom_violin() +
  stat_summary(fun=mean, geom="point", shape=16) +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

Right, calculate %V!

k_rhythm <- k %>%
  group_by(
    speaker, 
    sex,
    age,
    recording_type,
    uttr_id
  ) %>%
  summarise(
    speech_rate=num_syllables_uttr[1]/sum(phone_dur[segment_type %in% c("vowel", "consonant")]),
    speech_rate_phone=length(phone_dur[segment_type %in% c("vowel", "consonant")])/sum(phone_dur[segment_type %in% c("vowel", "consonant")]),
    num_words_uttr=num_words_uttr[1],
    num_syllables_uttr=num_syllables_uttr[1],
    uttr_start=uttr_start[1],
    uttr_end=uttr_end[1],
    uttr_dur=uttr_dur[1],
    v_prop=sum(phone_dur[segment_type=="vowel"])/sum(phone_dur[segment_type %in% c("vowel", "consonant")]),
    v_dur=mean(phone_dur[segment_type=="vowel"]),
    c_dur=mean(phone_dur[segment_type=="consonant"])
  ) %>%
  ungroup() %>%
  filter((1/speech_rate_phone) < 0.2)
## `summarise()` has grouped output by 'speaker', 'sex', 'age', 'recording_type'. You can override using the `.groups` argument.

Exploratory plots!

ggplot(k_rhythm, aes(x=1/speech_rate_phone, y=v_prop*100)) +
  geom_point() +
  geom_smooth(method="gam") +
  ylab("V%") +
  xlab("average segment duration (1 / speech rate)")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/k_avg_seg_dur-v_perc.png", width=6, height=4.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
ggplot(k_rhythm, aes(x=1/speech_rate_phone, y=v_prop*100)) +
  facet_wrap(~speaker) +
  geom_point() +
  geom_smooth(method="gam") +
  ylab("V%") +
  xlab("average segment duration (1 / speech rate)")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/k_avg_seg_dur-v_perc-speaker.png", width=10, height=7.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
ggplot(k_rhythm, aes(x=1/speech_rate_phone)) +
  geom_point(aes(y=v_dur), col="orange", alpha=0.3) +
  geom_point(aes(y=c_dur), col="purple", alpha=0.3) +
  geom_smooth(aes(y=v_dur), col="darkorange2", method="gam", se=F) +
  geom_smooth(aes(y=c_dur), col="purple4", method="gam", se=F) +
  ylab("average duration in s")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/k_avg_seg_dur-avg_C+V_dur.png", width=6, height=4.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
ggplot(k_rhythm, aes(x=1/speech_rate_phone)) +
  facet_wrap(~speaker) +
  geom_point(aes(y=v_dur), col="orange", alpha=0.3) +
  geom_point(aes(y=c_dur), col="purple", alpha=0.3) +
  geom_smooth(aes(y=v_dur), col="darkorange2", method="gam", se=F) +
  geom_smooth(aes(y=c_dur), col="purple4", method="gam", se=F) +
  ylab("average duration in s")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/k_avg_seg_dur-avg_C+V_dur-speaker.png", width=10, height=7.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

Pampango

For now, looking at read speech only.

library(tidyverse)
library(ggbeeswarm)

p <- read_csv("../data/kapampangan_durations.csv") %>%
  arrange(speaker, file, phone_start) %>%
  mutate(recording_type = str_extract(file, "[rs]s$"))
## Rows: 120232 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): speaker, sex, file, uttr_id, word, phone, previous, following, seg...
## dbl (16): age, speech_rate, speech_rate_phone, num_words_uttr, num_syllables...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

How much do speech rates vary?

p %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, col=recording_type, y=speech_rate)) +
  facet_wrap(~ speaker) +
  geom_beeswarm() +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

p %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, fill=recording_type, y=speech_rate)) +
  geom_violin() +
  stat_summary(fun=mean, geom="point", shape=16) +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

Some data filtering.

# utterance level filtering
# - at least 5 syllables
# - log sd based filtering for top end of speech rate

p_uttr <- p %>% 
  group_by(uttr_id, speaker) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
  group_by(speaker) %>%
  filter(log(speech_rate) < mean(log(speech_rate)) + 3*sd(log(speech_rate))) %>%
  ungroup()
## `summarise()` has grouped output by 'uttr_id'. You can override using the `.groups` argument.
p <- p %>%
  filter(
    num_syllables_uttr >= 5,
    uttr_id %in% p_uttr$uttr_id
  )

Plot speech rates again.

p %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, col=recording_type, y=speech_rate)) +
  facet_wrap(~ speaker) +
  geom_beeswarm() +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

p %>%
  group_by(speaker, recording_type, uttr_id) %>%
  summarise(speech_rate=speech_rate[1]) %>%
  ungroup() %>%
ggplot(aes(x=recording_type, fill=recording_type, y=speech_rate)) +
  geom_violin() +
  stat_summary(fun=mean, geom="point", shape=16) +
  scale_y_log10()
## `summarise()` has grouped output by 'speaker', 'recording_type'. You can override using the `.groups` argument.

Right, calculate %V!

p_rhythm <- p %>%
  group_by(
    speaker, 
    sex,
    age,
    recording_type,
    uttr_id
  ) %>%
  summarise(
    speech_rate=num_syllables_uttr[1]/sum(phone_dur[segment_type %in% c("vowel", "consonant")]),
    speech_rate_phone=length(phone_dur[segment_type %in% c("vowel", "consonant")])/sum(phone_dur[segment_type %in% c("vowel", "consonant")]),
    num_words_uttr=num_words_uttr[1],
    num_syllables_uttr=num_syllables_uttr[1],
    uttr_start=uttr_start[1],
    uttr_end=uttr_end[1],
    uttr_dur=uttr_dur[1],
    v_prop=sum(phone_dur[segment_type=="vowel"])/sum(phone_dur[segment_type %in% c("vowel", "consonant")]),
    v_dur=mean(phone_dur[segment_type=="vowel"]),
    c_dur=mean(phone_dur[segment_type=="consonant"]),
    v_count_prop=length(phone_dur[segment_type=="vowel"])/length(phone_dur)
      ) %>%
  ungroup() %>%
  filter((1/speech_rate_phone) < 0.2)
## `summarise()` has grouped output by 'speaker', 'sex', 'age', 'recording_type'. You can override using the `.groups` argument.

Exploratory plots!

ggplot(p_rhythm, aes(x=1/speech_rate_phone, y=v_prop*100)) +
  geom_point() +
  geom_smooth(method="gam") +
  ylab("V%") +
  xlab("average segment duration (1 / speech rate)")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/p_avg_seg_dur-v_perc.png", width=6, height=4.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
ggplot(p_rhythm, aes(x=1/speech_rate_phone, y=v_prop*100)) +
  facet_wrap(~speaker) +
  geom_point() +
  geom_smooth(method="gam") +
  ylab("V%") +
  xlab("average segment duration (1 / speech rate)")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/p_avg_seg_dur-v_perc-speaker.png", width=10, height=7.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
ggplot(p_rhythm, aes(x=1/speech_rate_phone)) +
  geom_point(aes(y=v_dur), col="orange", alpha=0.3) +
  geom_point(aes(y=c_dur), col="purple", alpha=0.3) +
  geom_smooth(aes(y=v_dur), col="darkorange2", method="gam", se=F) +
  geom_smooth(aes(y=c_dur), col="purple4", method="gam", se=F) +
  ylab("average duration in s")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/p_avg_seg_dur-avg_C+V_dur.png", width=6, height=4.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
ggplot(p_rhythm, aes(x=1/speech_rate_phone)) +
  facet_wrap(~speaker) +
  geom_point(aes(y=v_dur), col="orange", alpha=0.3) +
  geom_point(aes(y=c_dur), col="purple", alpha=0.3) +
  geom_smooth(aes(y=v_dur), col="darkorange2", method="gam", se=F) +
  geom_smooth(aes(y=c_dur), col="purple4", method="gam", se=F) +
  ylab("average duration in s")
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'

ggsave("graphs/p_avg_seg_dur-avg_C+V_dur-speaker.png", width=10, height=7.5, dpi=300)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'